RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
305 316
pander::pander(table(dataColonTest$status))
0 1
137 130

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)

[+++++-++++-++++++++++-++++-+++++++++-++++-++++-+++++-]….

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
nodes 2.42e-02 1.016 1.024 1.033 0.594
age_nodes 4.82e-04 1.000 1.000 1.001 0.597
node4 1.48e-01 1.100 1.160 1.223 0.593
age_node4 2.09e-03 1.001 1.002 1.003 0.593
rxLev_5FU_age -1.24e-03 0.998 0.999 1.000 0.562
rxLev_5FU -5.84e-02 0.906 0.943 0.982 0.562
extent 7.12e-02 1.017 1.074 1.134 0.544
adhere 3.00e-02 1.004 1.030 1.057 0.525
age_adhere 5.32e-10 1.000 1.000 1.000 0.525
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
nodes 0.516 0.599 0.597 0.512 0.602
age_nodes 0.509 0.597 0.600 0.500 0.600
node4 0.569 0.596 0.597 0.567 0.600
age_node4 0.568 0.591 0.597 0.565 0.595
rxLev_5FU_age 0.595 0.596 0.559 0.599 0.600
rxLev_5FU 0.593 0.591 0.559 0.597 0.596
extent 0.592 0.597 0.538 0.596 0.601
adhere 0.594 0.612 0.531 0.597 0.615
age_adhere 0.509 0.525 0.531 0.500 0.531
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
nodes 0.02890 0.388 5.94 5.22 0.09063 1.0
age_nodes 0.03049 0.401 5.80 5.42 0.10032 1.0
node4 0.03579 0.388 5.45 5.70 0.03338 1.0
age_node4 0.03517 0.388 5.41 5.70 0.03025 1.0
rxLev_5FU_age 0.01131 0.236 3.07 3.15 0.00143 1.0
rxLev_5FU 0.00957 0.236 2.81 3.15 -0.00171 1.0
extent 0.00885 0.150 2.56 2.82 0.00484 1.0
adhere 0.00519 0.125 2.33 2.27 0.01746 0.6
age_adhere 0.00483 0.125 2.22 2.27 0.03134 0.2

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.4610 0.344 0.292 0.27013 0.4997
RR 1.6114 1.623 2.170 1.00000 1.5921
RR_LCI 1.3949 1.389 1.319 0.00000 1.3617
RR_UCI 1.8615 1.897 3.570 0.00000 1.8615
SEN 0.2785 0.560 0.962 1.00000 0.1772
SPE 0.8951 0.685 0.121 0.00656 0.9410
BACC 0.5868 0.623 0.542 0.50328 0.5591
NetBenefit 0.0976 0.204 0.312 0.32829 0.0612
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.98 0.875 1.09 0.759
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.48 1.48 1.46 1.51
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.39 1.39 1.39 1.4
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.642 0.642 0.611 0.674
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.621 0.706
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.278 0.23 0.331
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.898 0.859 0.93
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.462
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")

Quitting from lines 109-122 (Colon.Rmd) Error in t.default(rrAnalysisTrain$RR_atP) : argument is not a matrix Calls: … eval_with_user_handlers -> eval -> eval -> -> t -> t.default In addition: Warning messages: 1: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 2: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 3: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 4: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 5: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 6: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 7: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 8: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 9: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter

pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 58.848822 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 502 228 274.1 7.74 58.8
class=1 119 88 41.9 50.60 58.8

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.703 1.52 2.97

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.610 0.474 0.409 0.38132 0.500
RR 1.611 1.623 2.122 1.00000 1.544
RR_LCI 1.395 1.389 1.292 0.00000 1.334
RR_UCI 1.862 1.897 3.485 0.00000 1.787
SEN 0.278 0.560 0.962 1.00000 0.418
SPE 0.895 0.685 0.118 0.00656 0.787
BACC 0.587 0.623 0.540 0.50328 0.602
NetBenefit 0.061 0.146 0.190 0.20817 0.108
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.779 0.695 0.87 4.3e-06
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.988 0.988 0.974 1
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.05 1.06
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.642 0.643 0.612 0.672
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.663 0.621 0.706
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.278 0.23 0.331
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.898 0.859 0.93
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.611
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")

Quitting from lines 161-174 (Colon.Rmd) Error in t.default(rrAnalysisTrain$RR_atP) : argument is not a matrix Calls: … eval_with_user_handlers -> eval -> eval -> -> t -> t.default In addition: Warning messages: 1: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 2: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 3: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 4: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 5: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 6: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 7: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 8: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 9: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter

pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 58.848822 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 502 228 274.1 7.74 58.8
class=1 119 88 41.9 50.60 58.8

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.611 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6083 0.451 0.407 3.93e-01 0.501
RR 1.7584 3.608 8.772 1.98e+01 1.875
RR_LCI 1.4078 2.212 1.305 4.34e-02 1.486
RR_UCI 2.1962 5.886 58.958 9.01e+03 2.367
SEN 0.3231 0.892 0.992 1.00e+00 0.492
SPE 0.8905 0.489 0.117 2.92e-02 0.803
BACC 0.6068 0.691 0.555 5.15e-01 0.648
NetBenefit 0.0701 0.219 0.172 1.65e-01 0.138
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.708 0.591 0.84 3.52e-05
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.931 0.932 0.907 0.956
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.07 1.07 1.07 1.08
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.696 0.696 0.651 0.737
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.741 0.682 0.799
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.323 0.244 0.411
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.898 0.834 0.943
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.611
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")

Quitting from lines 196-209 (Colon.Rmd) Error in t.default(rrAnalysisTest$RR_atP) : argument is not a matrix Calls: … eval_with_user_handlers -> eval -> eval -> -> t -> t.default In addition: Warning messages: 1: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 2: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 3: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 4: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 5: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 6: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 7: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 8: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 9: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter

pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 31.486584 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 211 88 110.7 4.64 31.5
class=1 56 42 19.3 26.54 31.5

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[++++].[++++].[+++-].[++++].[++++].[++++–].[++++-].[++++—].[++++–].[++++]10 Tested: 854 Avg. Selected: 6.4 Min Tests: 1 Max Tests: 10 Mean Tests: 4.953162 . MAD: 0.4796788

.[+++].[+++++].[++++-].[+++++].[++++-].[++++].[+++].[++++].[+++++].[++++]20 Tested: 887 Avg. Selected: 6.8 Min Tests: 1 Max Tests: 20 Mean Tests: 9.537768 . MAD: 0.4792116

.[++++].[+++–].[++++].[++++].[++++].[++++].[++–].[++++].[++++–].[+++–]30 Tested: 888 Avg. Selected: 6.666667 Min Tests: 1 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4791815

.[++–].[+++–].[++++].[+++++].[++++—].[++++++].[+++].[++++].[+++-].[++++-]40 Tested: 888 Avg. Selected: 6.85 Min Tests: 1 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4789083

.[++++-].[+++++].[+++-].[++++].[+++-+-].[+++].[++-].[+++-].[++++].[++-]50 Tested: 888 Avg. Selected: 6.72 Min Tests: 3 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4790776

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.611 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.6109 0.483 0.425 0.369 0.500
RR 1.5886 1.677 2.446 1.000 1.604
RR_LCI 1.4053 1.473 1.681 0.000 1.417
RR_UCI 1.7959 1.909 3.557 0.000 1.815
SEN 0.3229 0.538 0.951 1.000 0.435
SPE 0.8620 0.719 0.176 0.000 0.787
BACC 0.5924 0.629 0.564 0.500 0.611
NetBenefit 0.0543 0.140 0.174 0.211 0.113
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.751 0.683 0.824 2.86e-10
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.936 0.936 0.925 0.947
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.02 1.02 1.02 1.02
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.646 0.646 0.622 0.671
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.668 0.633 0.704
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.321 0.278 0.366
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.862 0.826 0.893
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.611
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")

Quitting from lines 251-263 (Colon.Rmd) Error in t.default(rrAnalysisCVTest$RR_atP) : argument is not a matrix Calls: … eval_with_user_handlers -> eval -> eval -> -> t -> t.default In addition: Warning messages: 1: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 2: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 3: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 4: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 5: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 6: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 7: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 8: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter 9: In doTryCatch(return(expr), name, parentenv, handler) : “quiet” is not a graphical parameter

pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 72.028528 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 684 303 370.1 12.2 72
class=1 204 143 75.9 59.3 72